In [1]:
#NumPy is a Python Library used to perform wide variety of Mathematical Operations on Arrays.
import numpy as np
#Pandas is a Python Library used to analyze big data and make Conclusions based on Stastical Theories.
import pandas as pd
#matplotlib.pyplot contains functions that make matplotlib work like MATLAB and
#each pyplot fuction makes some changes in the figure.
import matplotlib.pyplot as plt
#plotly.express contains functions that can create Entire Figures.
import plotly.express as px
import plotly.graph_objects as go
#init_notebook_mode prepares our Jupyter Notebook to display Plotly graphs right within the notebook itself.
from plotly.offline import iplot, plot, init_notebook_mode
from plotly.subplots import make_subplots
#%matplotlib inline makes any Matplotlib plots to automatically be embedded within the notebook, making it easier to view and analyze them as we work.
%matplotlib inline
from wordcloud import WordCloud , ImageColorGenerator
import seaborn as sns

Importing the Dataset.¶

In [3]:
#Loading the Dataset.
Data=pd.read_csv("C:\\Users\\Admin\\Desktop\\Madhu\\Anaconda-Jupyter\\Indian Food Analysis Jupyter Project\\Raw Data.csv")
In [4]:
#To get First 5 Rows of Dataset.
Data.head()
Out[4]:
Name Ingredients Diet Preparation_Time Cooking_Time Flavor Course_Name State Region
0 Balu shahi Maida flour, yogurt, oil, sugar Vegetarian 45 25 Sweet Dessert West Bengal East
1 Boondi Gram flour, ghee, sugar Vegetarian 80 30 Sweet Dessert Rajasthan West
2 Gajar ka halwa Carrots, milk, sugar, ghee, cashews, raisins Vegetarian 15 60 Sweet Dessert Punjab North
3 Ghevar Flour, ghee, kewra, milk, clarified butter, su... Vegetarian 15 30 Sweet Dessert Rajasthan West
4 Gulab jamun Milk powder, plain flour, baking powder, ghee,... Vegetarian 15 40 Sweet Dessert West Bengal East
In [5]:
#To get Last 5 Rows of Dataset.
Data.tail()
Out[5]:
Name Ingredients Diet Preparation_Time Cooking_Time Flavor Course_Name State Region
250 Til Pitha Glutinous rice, black sesame seeds, gur Vegetarian 5 30 Sweet Dessert Assam North East
251 Bebinca Coconut milk, egg yolks, clarified butter, all... Vegetarian 20 60 Sweet Dessert Goa West
252 Shufta Cottage cheese, dry dates, dried rose petals, ... Vegetarian 20 45 Sweet Dessert Jammu & Kashmir North
253 Mawa Bati Milk powder, dry fruits, arrowroot powder, all... Vegetarian 20 45 Sweet Dessert Madhya Pradesh Central
254 Pinaca Brown rice, fennel seeds, grated coconut, blac... Vegetarian 20 45 Sweet Dessert Goa West
In [6]:
#To get Random 5 Sample Rows from the Dataset
Data.sample(5)
Out[6]:
Name Ingredients Diet Preparation_Time Cooking_Time Flavor Course_Name State Region
23 Cham cham Flour, cream, sugar, saffron, lemon juice, coc... Vegetarian 40 60 Sweet Dessert West Bengal East
215 Thepla Chickpea flour, methi leaves, jowar flour, whe... Vegetarian 15 30 Spicy Snack Gujarat West
239 Koldil Chicken Banana flower, chicken, green chili, mustard o... Non-Vegetarian 20 30 Spicy Main Course Assam North East
36 Adhirasam Rice flour, jaggery, ghee, vegetable oil, elachi Vegetarian 10 50 Sweet Dessert West Bengal East
28 Mihidana Besan flour, sugar, ghee Vegetarian 15 30 Sweet Dessert West Bengal East
In [7]:
#To get all the Columns from the Dataset.
Data.columns
Out[7]:
Index(['Name', 'Ingredients', 'Diet', 'Preparation_Time', 'Cooking_Time',
       'Flavor', 'Course_Name', 'State', 'Region'],
      dtype='object')

Checking the Dataset.¶

In [9]:
#To get Shape and Size of the Data.
Data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 255 entries, 0 to 254
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Name              255 non-null    object
 1   Ingredients       255 non-null    object
 2   Diet              255 non-null    object
 3   Preparation_Time  255 non-null    int64 
 4   Cooking_Time      255 non-null    int64 
 5   Flavor            255 non-null    object
 6   Course_Name       255 non-null    object
 7   State             255 non-null    object
 8   Region            255 non-null    object
dtypes: int64(2), object(7)
memory usage: 18.1+ KB
In [10]:
#To get the Shape of the Dataset.
Data.shape
Out[10]:
(255, 9)
In [11]:
#To get all the Information regarding the Dataset.
Data.describe()
Out[11]:
Preparation_Time Cooking_Time
count 255.000000 255.000000
mean 33.576471 38.992157
std 71.769323 46.679759
min 5.000000 2.000000
25% 10.000000 25.000000
50% 15.000000 30.000000
75% 20.000000 45.000000
max 500.000000 720.000000
In [12]:
#To check the Null Values in the Dataset.
Data.isnull().sum()
Out[12]:
Name                0
Ingredients         0
Diet                0
Preparation_Time    0
Cooking_Time        0
Flavor              0
Course_Name         0
State               0
Region              0
dtype: int64

We have no Null Values.

Analysing and Visualizing the Dataset.¶

1)(i) To get the List of count of Vegetarians and Non-Vegetarians.

In [16]:
Veg_NonVeg = Data.Diet.value_counts().reset_index()
Veg_NonVeg
Out[16]:
Diet count
0 Vegetarian 226
1 Non-Vegetarian 29

1)(ii) To Plot the Graph.

In [18]:
Veg_NonVeg.columns = ['Diet','Count']
fig = px.pie(Veg_NonVeg, values='Count', names='Diet', title='Proportion of Vegetarian and Non-Vegetarian Dishes',
             color_discrete_sequence=['red', 'black'])
fig.update_layout(width=700, height=600)
fig.show()

2)(i) To get the List of all Sweet Dishes.

In [20]:
Sweet_Data = Data[Data['Flavor']=='Sweet']
Sweet_Data
Out[20]:
Name Ingredients Diet Preparation_Time Cooking_Time Flavor Course_Name State Region
0 Balu shahi Maida flour, yogurt, oil, sugar Vegetarian 45 25 Sweet Dessert West Bengal East
1 Boondi Gram flour, ghee, sugar Vegetarian 80 30 Sweet Dessert Rajasthan West
2 Gajar ka halwa Carrots, milk, sugar, ghee, cashews, raisins Vegetarian 15 60 Sweet Dessert Punjab North
3 Ghevar Flour, ghee, kewra, milk, clarified butter, su... Vegetarian 15 30 Sweet Dessert Rajasthan West
4 Gulab jamun Milk powder, plain flour, baking powder, ghee,... Vegetarian 15 40 Sweet Dessert West Bengal East
... ... ... ... ... ... ... ... ... ...
250 Til Pitha Glutinous rice, black sesame seeds, gur Vegetarian 5 30 Sweet Dessert Assam North East
251 Bebinca Coconut milk, egg yolks, clarified butter, all... Vegetarian 20 60 Sweet Dessert Goa West
252 Shufta Cottage cheese, dry dates, dried rose petals, ... Vegetarian 20 45 Sweet Dessert Jammu & Kashmir North
253 Mawa Bati Milk powder, dry fruits, arrowroot powder, all... Vegetarian 20 45 Sweet Dessert Madhya Pradesh Central
254 Pinaca Brown rice, fennel seeds, grated coconut, blac... Vegetarian 20 45 Sweet Dessert Goa West

90 rows × 9 columns

2)(ii) To get the List of Sweets other than Desserts.

In [22]:
Desert_Sweet_Data = Sweet_Data[Sweet_Data['Course_Name']!='Dessert']
Desert_Sweet_Data
Out[22]:
Name Ingredients Diet Preparation_Time Cooking_Time Flavor Course_Name State Region
46 Obbattu holige Maida flour, turmeric, coconut, chickpeas, jag... Vegetarian 180 60 Sweet Main Course Karnataka South
85 Dal makhani Red kidney beans, urad dal, cream, garam masal... Vegetarian 10 60 Sweet Main Course Punjab North
155 Puttu Brown rice flour, sugar, grated coconut Vegetarian 495 40 Sweet Main Course Kerala South
176 Copra paak Condensed milk, nestle cream, coconut ice, red... Vegetarian 20 30 Sweet Main Course Gujarat West
243 Mishti Chholar Dal Chana dal, fresh coconut, ginger, cinnamon, ra... Vegetarian 10 30 Sweet Main Course West Bengal East

3)(i) To get the List of all the Flavours.

In [24]:
Flavor_Data = Data.Flavor.value_counts().reset_index()
Flavor_Data
Out[24]:
Flavor count
0 Spicy 143
1 Sweet 90
2 Normal 13
3 Bitter 6
4 Sour 3

3)(ii) To Plot the Graph.

In [26]:
Flavor_Data=Data['Flavor'].value_counts().rename_axis('Flavour').reset_index(name='Food')
plt.figure(figsize=(8,5))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.title("No of Foods according to the Flavours",fontsize=30)
plt.xlabel('',fontsize=25)
plt.ylabel('',fontsize=20)
Graph=sns.barplot(x="Flavour", y="Food", data=Flavor_Data, hue="Flavour", palette="flare", dodge=False, edgecolor='black')
for container in Graph.containers:
    Graph.bar_label(container, fontsize=10) # To get LABEL on EACH BAR.
Graph.set(xlabel='Flavor',ylabel='No of Foods')
plt.show(Graph)
No description has been provided for this image

4)(i) To get the List of Top 10 Foods with LOW Cooking Time.

In [28]:
Cooking_Time= Data[['Cooking_Time','Name','Flavor']]
Cooking_Time=Cooking_Time.sort_values(['Cooking_Time'],ascending=True)
Top_10=Cooking_Time.head(10)
Top_10
Out[28]:
Cooking_Time Name Flavor
109 2 Pani puri Spicy
11 5 Lassi Sweet
147 5 Papadum Spicy
111 5 Papad Spicy
212 6 Lilva Kachori Spicy
78 10 Chapati Normal
190 10 Keri no ras Sour
169 10 Bajri no rotlo Spicy
195 10 Koshimbir Spicy
214 10 Khichu Spicy

4)(ii) To Plot the Graph.

In [30]:
plt.figure(figsize=(12,5))
plt.title('Top 10 Dishes Based on Cooking Time', fontsize=25)
plt.xticks(fontsize=9, rotation=45, ha='right')
plt.yticks(fontsize=12)
plt.ylabel('Cooking Time in Minutes', fontsize=15)
plt.xlabel('Name of the Dish', fontsize=15)
Graph = sns.barplot(y='Cooking_Time', x='Name', data=Top_10, palette='Set2', hue='Name', dodge=False, edgecolor='black')
for container in Graph.containers:
    Graph.bar_label(container, fontsize=10)  # Add labels to each bar
plt.show()
No description has been provided for this image

5)(i) To get the List of Top 10 Foods with HIGH Cooking Time.

In [32]:
Cooking_Time= Data[['Cooking_Time','Name','Flavor']]
Cooking_Time=Cooking_Time.sort_values(['Cooking_Time'],ascending=False)
Top_10=Cooking_Time.head(10)
Top_10
Out[32]:
Cooking_Time Name Flavor
62 720 Shrikhand Sweet
27 120 Malapua Sweet
114 120 Pindi chana Spicy
75 120 Biryani Spicy
115 90 Rajma chaval Spicy
128 90 Dosa Spicy
142 90 Kuzhakkattai Spicy
144 90 Masala Dosa Spicy
83 90 Daal baati churma Spicy
130 90 Idli Spicy

5)(ii) To Plot the Graph.

In [34]:
plt.figure(figsize=(12,5))
plt.title('Top 10 Dishes Based on Cooking Time', fontsize=25)
plt.xticks(fontsize=9, rotation=45, ha='right')
plt.yticks(fontsize=12)
plt.ylabel('Cooking Time in Minutes', fontsize=15)
plt.xlabel('Name of the Dish', fontsize=15)
Graph = sns.barplot(y='Cooking_Time', x='Name', data=Top_10, palette='autumn', hue='Name', dodge=False, edgecolor='black')
for container in Graph.containers:
    Graph.bar_label(container, fontsize=10)  # Add labels to each bar
plt.show()
No description has been provided for this image

6)(i) To get the List of Foods in Different Regions of India.

In [36]:
Regions = Data.Region.value_counts().reset_index()
Regions
Out[36]:
Region count
0 West 74
1 South 59
2 North 53
3 North East 35
4 East 31
5 Central 3

6)(ii) To Plot the Graph

In [38]:
Regions.columns = ['Region','count']
plt.figure(figsize=(10, 8))
plt.pie(Regions['count'], labels=Regions['Region'], autopct='%1.1f%%', colors=sns.color_palette('Set2'),textprops={'fontsize': 12})
plt.title('Proportion of Dishes in Different Regions', fontsize=18)
plt.show()
No description has been provided for this image

7)(i) To get the List of Foods in Different States of India.

In [40]:
States = Data.State.value_counts().reset_index()
States
Out[40]:
State count
0 Gujarat 35
1 Punjab 32
2 Maharashtra 30
3 West Bengal 24
4 Assam 21
5 Tamil Nadu 20
6 Meghalaya 12
7 Himachal Pradesh 12
8 Andhra Pradesh 10
9 Uttar Pradesh 9
10 Kerala 8
11 Odisha 7
12 Karnataka 6
13 Rajasthan 6
14 Telangana 5
15 Bihar 3
16 Goa 3
17 Manipur 2
18 Jammu & Kashmir 2
19 Madhya Pradesh 2
20 Uttarakhand 1
21 Tripura 1
22 Nagaland 1
23 Delhi 1
24 Chhattisgarh 1
25 Haryana 1

7(ii) To Plot the Graph.

In [42]:
plt.figure(figsize=(12,5))
plt.title('No of Dishes in Different States', fontsize=25)
plt.xticks(fontsize=9, rotation=45, ha='right')
plt.yticks(fontsize=12)
plt.ylabel('No of Dishes', fontsize=15)
plt.xlabel('States', fontsize=15)
Graph = sns.barplot(y='count', x='State', data=States, palette='PuRd_r', hue='State', dodge=False, edgecolor='black')
for container in Graph.containers:
    Graph.bar_label(container, fontsize=10)  # Add labels to each bar
plt.show()
No description has been provided for this image
In [ ]: